###########################
#### table_1.R         ####
#### Generates Table 1 ####
###########################

county_info  = import(here("Data","geo_info", "county_info.rds"))
state_info   = import(here("Data","geo_info", "state_info.rds"))
zip_info     = import (here("Data","geo_info","zip_info.rds")) |> 
  mutate(urban = case_when(
    urban == 'Urban' ~ 1,
    urban == 'Urban-suburban' ~ 2,
    urban == 'Suburban' ~ 3,
    urban == 'Rural metro' ~ 4,
    urban == 'Pure Rural' ~ 5
  ))
tab_in       = import(here("Data","var_data.csv")) |> 
  filter(level != 'individual') |> 
  distinct(class, level, var2, var_label, desc)

## Summary statistics ##
sum_dat = map_dfr(1:nrow(tab_in), \(x){
  #print(x)
  # Define Level for Data
  level_in = tab_in$level[x]
  is_survey = F
  if(level_in == "zip"){
    dta = zip_info
  }
  if(level_in == "individual"){
    dta = data; is_survey = T
  }
  if(level_in == "county"){
    dta = county_info
  }
  if(level_in == "state"){
    dta = state_info
  }
  # Calculate summary statistics
  vs = as.symbol(tab_in$var2[x])
  if(is_survey){
    out = dta |> 
      as_survey_design(weights = wt_adj) |> 
      summarise(n    = n(),
                n_d  = n_distinct({{vs}}, na.rm = T),
                n_na = sum(is.na({{vs}})),
                mean = survey_mean({{vs}}, na.rm = T, vartype = 'se'),
                sd   = survey_sd({{vs}}, na.rm = T))
  } else {
    out = dta |> 
      summarise(n    = n(),
                n_d  = n_distinct({{vs}}, na.rm = T),
                n_na = sum(is.na({{vs}})),
                mean = mean({{vs}}, na.rm = T),
                sd   = sd({{vs}}, na.rm = T))
  }
  out |> 
    select(n, n_d, n_na, mean, sd) |> 
    bind_cols(slice(tab_in, x))
})

## Table ##
sum_dat |> 
  select(var_label, desc, level, n, n_d, n_na, mean, sd) |> 
  mutate(across(mean:sd, ~ round(.x, 2))) |> 
  kable(format = 'latex',
        booktabs = T,
        col.names = c("Variabe", "Description", "Geography",
                      "N values", "Unique values", "NA count",
                      "Mean" , "SD")) |> 
  kable_classic()
